library(lme4)
library(lmerTest)
library(psych)
library(MuMIn)
library(quanteda.textstats)
library(ggplot2)

upworthy <- read.csv('LIWC-22 Results - upworthy-archive-confirmatory-___ - LIWC Analysis.csv')
upworthy$readability <- textstat_readability(upworthy$headline)

upworthy$simplicity <- scale(upworthy$readability$Flesch) + scale(upworthy$Dic) - scale(upworthy$Analytic) - scale(upworthy$character_count)

simple_language <- lmer(log(upworthy$clicks_per_impression+.001) ~ simplicity + 
                          (1|clickability_test_id), data = upworthy)
summary(simple_language)
r.squaredGLMM(simple_language)

simp <- ggplot(upworthy, aes(simplicity)) +
  geom_histogram(fill = "#a00000") + xlab("Simplicity Index") + theme_classic()

read <- ggplot(upworthy, aes(upworthy$readability$Flesch)) +
  geom_histogram(fill = "#1a80bb") + xlab("Readability") + theme_classic()

comm <- ggplot(upworthy, aes(Dic)) +
  geom_histogram(fill = "#384860") + xlab("Common Words") + theme_classic()

analytic <- ggplot(upworthy, aes(Analytic)) +
  geom_histogram(fill = "#ea801c") + xlab("Analytic Writing") + theme_classic()

ccount <- ggplot(upworthy, aes(character_count)) +
  geom_histogram(fill = "#f2c45f") + xlab("Character Count") + theme_classic()
